Stock Price Forecast

Using Technical Analysis and Machine Learning

Stock
Technical Analysis
Machine Learning
Author

Hoang Son Lai

Published

January 23, 2026

Part 1. Technical Analysis

This section provides a comprehensive technical analysis of stock prices using multiple indicators. For each stock, I calculate key metrics:

  1. Trend Indicators:
  • MA50/MA200: 50-day and 200-day Simple Moving Averages compare current price to medium/long-term trends (Price > MA = bullish +1, Price < MA = bearish -1)

  • EMA20: 20-day Exponential Moving Average gives more weight to recent prices for short-term trend direction

  1. Momentum Indicators:
  • RSI (14-day): Measures overbought (>70 = -1) vs. oversold (<30 = +1) conditions, with 30-70 being neutral

  • MACD: Signal line crossover indicator (MACD > 0 = +1 bullish momentum, MACD < 0 = -1 bearish momentum)

  1. Volatility & Volume Indicators:
  • Bollinger Bands: Price above upper band = overbought (-1), below lower band = oversold (+1), within bands = neutral (0)

  • MFI (Money Flow Index): Volume-weighted RSI (>80 = -1 overbought, <20 = +1 oversold, 20-80 = neutral)

Each indicator is scored (+1 for bullish, -1 for bearish, 0 for neutral), with a final aggregate signal determining the overall market outlook (Positive, Negative, or Neutral).

Code
# Load necessary libraries
library(tidyverse)
library(dplyr)
library(tidyquant)
library(TTR)
library(xgboost)
library(plotly)
library(gt)
library(gtExtras)
library(readr)

# 1. Load Data
# Assuming the same directory structure
df <- read_csv("data/cleaned/stock_prices.csv")
df$date <- as.Date(df$date)
df <- df %>% arrange(ticker, date)
Code
# Define function to calculate indicators
calculate_indicators <- function(data) {
  # Create the HLC matrix required by TTR functions
  hlc <- data %>% select(high, low, close)
  
  data %>%
    mutate(
      SMA_50  = SMA(close, n = 50),
      SMA_200 = SMA(close, n = 200),
      EMA_20  = EMA(close, n = 20),
      RSI_14  = RSI(close, n = 14),
      # FIX: Pass HLC as one argument and volume as the second
      MFI_14  = MFI(hlc, volume = data$volume, n = 14)
    ) %>%
    # MACD returns multiple columns
    bind_cols(as_tibble(MACD(data$close, 12, 26, 9, maType="EMA"))) %>%
    # Bollinger Bands
    bind_cols(as_tibble(BBands(data$close, n = 20, sd = 2)))
}

# Apply calculations to all tickers
df_ta <- df %>%
  group_by(ticker) %>%
  filter(n() > 200) %>%
  group_modify(~ calculate_indicators(.x)) %>%
  ungroup()

# Scoring Logic
df_ta <- df_ta %>%
  mutate(
    MA50_Score  = case_when(close > SMA_50 ~ 1, close < SMA_50 ~ -1, TRUE ~ 0),
    MA200_Score = case_when(close > SMA_200 ~ 1, close < SMA_200 ~ -1, TRUE ~ 0),
    EMA_Score   = case_when(close > EMA_20 ~ 1, close < EMA_20 ~ -1, TRUE ~ 0),
    MACD_Score  = case_when(macd > signal ~ 1, macd < signal ~ -1, TRUE ~ 0),
    RSI_Score   = case_when(RSI_14 < 30 ~ 1, RSI_14 > 70 ~ -1, TRUE ~ 0),
    BB_Score    = case_when(close < dn ~ 1, close > up ~ -1, TRUE ~ 0),
    MFI_Score   = case_when(MFI_14 < 20 ~ 1, MFI_14 > 80 ~ -1, TRUE ~ 0)
  ) %>%
  mutate(
    Total_Score = MA50_Score + MA200_Score + EMA_Score + MACD_Score + RSI_Score + BB_Score + MFI_Score,
    Signal = case_when(Total_Score > 0 ~ "Positive", Total_Score < 0 ~ "Negative", TRUE ~ "Neutral")
  )

# Generate Summary Table
summary_df <- df_ta %>%
  group_by(ticker) %>%
  filter(date == max(date)) %>%
  select(ticker, close, volume, SMA_50, SMA_200, EMA_20, macd, RSI_14, up, dn, MFI_14, 
         contains("Score"), Signal)

# Display styled table using gt
summary_df %>%
  gt() %>%
  tab_header(title = "Technical Analysis Summary - Latest Signals") %>%
  fmt_number(columns = c(close, SMA_50, SMA_200, EMA_20, up, dn), decimals = 2) %>%
  fmt_number(columns = volume, suffixing = TRUE) %>%
  gt_color_rows(Total_Score, palette = c("red", "yellow", "green")) %>%
  tab_style(
    style = cell_fill(color = "#d4edda"),
    locations = cells_body(columns = Signal, rows = Signal == "Positive")
  ) %>%
  tab_style(
    style = cell_fill(color = "#f8d7da"),
    locations = cells_body(columns = Signal, rows = Signal == "Negative")
  )
Technical Analysis Summary - Latest Signals
close volume SMA_50 SMA_200 EMA_20 macd RSI_14 up dn MFI_14 MA50_Score MA200_Score EMA_Score MACD_Score RSI_Score BB_Score MFI_Score Total_Score Signal
AAPL
248.68 20.37M 269.78 234.74 259.27 -2.36946399 23.54549 279.58 243.59 25.31303 -1 1 -1 -1 1 0 0 -1 Negative
ADBE
302.73 1.71M 331.50 357.65 318.17 -3.79778065 34.05754 369.17 281.19 32.94964 -1 -1 -1 -1 0 0 0 -4 Negative
AMZN
239.68 17.27M 232.37 220.30 236.04 0.59766662 55.47537 248.51 225.03 61.55384 1 1 1 -1 0 0 0 2 Positive
BAC
51.58 14.33M 53.90 48.30 53.79 -1.23120016 34.56166 58.15 51.02 45.30107 -1 1 -1 -1 0 0 0 -2 Negative
DIS
111.56 4.05M 109.76 110.60 112.44 0.39737151 48.28802 115.90 110.75 59.20544 1 1 -1 -1 0 0 0 0 Neutral
GOOGL
328.39 12.24M 312.72 229.67 323.91 1.80127254 58.32054 339.59 306.99 58.85994 1 1 1 -1 0 0 0 2 Positive
HD
383.07 996.84K 356.24 370.44 368.76 2.17222348 65.62475 395.07 332.06 75.39303 1 1 1 1 0 0 0 4 Positive
JNJ
218.29 2.19M 206.78 176.21 212.80 1.79453697 67.65978 222.64 199.76 60.93761 1 1 1 1 0 0 0 4 Positive
JPM
297.76 4.51M 312.95 288.98 313.03 -1.20877563 34.77893 340.64 296.11 45.10625 -1 1 -1 -1 0 0 0 -2 Negative
KO
72.26 5.75M 70.56 69.11 70.70 0.70710718 62.89778 72.89 67.56 62.38649 1 1 1 1 0 0 0 4 Positive
MA
525.59 1.74M 553.92 561.10 550.38 -1.56460317 30.93978 598.33 519.88 35.03904 -1 -1 -1 -1 0 0 0 -4 Negative
META
661.78 11.57M 639.49 676.81 640.07 -0.82320725 57.21962 683.17 605.76 56.27555 1 -1 1 1 0 0 0 2 Positive
MSFT
469.30 19.93M 480.94 482.82 468.51 -1.67824555 48.27677 498.43 445.76 42.65325 -1 -1 1 -1 0 0 0 -2 Negative
NFLX
86.00 34.57M 97.70 113.01 89.75 -3.64960133 31.64944 96.02 84.00 20.03264 -1 -1 -1 -1 0 0 0 -4 Negative
NVDA
187.94 80.23M 183.83 165.69 184.93 0.08988118 54.29832 191.68 180.92 46.66409 1 1 1 -1 0 0 0 2 Positive
PG
150.58 6.52M 144.97 153.20 145.17 0.73180086 64.85683 150.08 138.06 66.20983 1 -1 1 1 0 -1 0 1 Positive
PYPL
56.91 4.05M 60.13 67.19 57.83 -2.06688642 42.09407 60.67 55.21 35.16651 -1 -1 -1 1 0 0 0 -2 Negative
TSLA
448.26 34.98M 442.38 372.75 444.51 -0.77553181 51.28174 475.57 416.33 55.61414 1 1 1 -1 0 0 0 2 Positive
UNH
355.18 3.11M 331.31 332.55 340.37 1.29486565 63.18486 355.43 323.15 58.84521 1 1 1 1 0 0 0 4 Positive
V
327.00 2.25M 338.03 344.07 336.65 -1.43186372 35.02471 367.72 316.84 40.38805 -1 -1 -1 -1 0 0 0 -4 Negative
WMT
118.40 8.19M 112.18 102.04 116.44 1.71530489 60.19195 122.17 108.92 27.87472 1 1 1 1 0 0 0 4 Positive

Part 2. Machine Learning

1. Overview

Building upon the technical analysis in Part 1, this section utilizes Machine Learning (XGBoost) to predict stock prices. Unlike traditional indicators that give simple Buy/Sell signals, the ML model analyzes the complex relationships between historical patterns (RSI, MACD, Moving Averages) to forecast the exact closing price of the next trading day.

2. Methodology

  • Algorithm: We use XGBRegressor (Extreme Gradient Boosting), a robust algorithm highly effective for structured time-series data.

  • Feature Engineering: The model inputs include Open, High, Low, Volume, and all technical indicators calculated in Part 1 (SMA, EMA, Bollinger Bands, etc.).

  • Training & Validation: To prevent “data leakage” (looking into the future), the data is split chronologically:

    • Training Set (First 80%): Used to teach the model historical patterns.

    • Test Set (Last 20%): Used to evaluate how well the model predicts unseen data.

3. Interactive Analysis Dashboard

The visualization below provides a comprehensive view of the model’s performance. Use the dropdown menu to select a specific ticker:

  • Top Chart (Actual vs. Predicted): Compares the real market price (Blue line) against the model’s prediction (Orange dotted line). The closer the lines, the better the model accuracy.

  • Bottom Chart (Feature Importance): Ranks which technical indicators were most influential in determining the price. For example, if RSI has a high bar, the model relies heavily on momentum to make predictions for that specific stock.

Note: The table below summarizes the prediction for the next upcoming trading day, including the predicted percentage change.

Code
# Prepare features
feature_cols <- c("open", "high", "low", "close", "volume", 
                  "SMA_50", "SMA_200", "EMA_20", "RSI_14", 
                  "macd", "signal", "up", "mavg", "dn", "pctB", "MFI_14")

ml_results <- list()
ticker_preds <- list()

for (t in unique(df_ta$ticker)) {
  ticker_data <- df_ta %>% filter(ticker == t) %>% drop_na(any_of(feature_cols))
  
  # Create Target (Shifted Close)
  ticker_data$target <- lead(ticker_data$close)
  
  # Split data
  train_df <- ticker_data %>% drop_na(target)
  split_idx <- floor(0.8 * nrow(train_df))
  
  train_set <- train_df[1:split_idx, ]
  test_set  <- train_df[(split_idx + 1):nrow(train_df), ]
  
  # XGBoost Matrices
  dtrain <- xgb.DMatrix(data = as.matrix(train_set[, feature_cols]), label = train_set$target)
  dtest  <- xgb.DMatrix(data = as.matrix(test_set[, feature_cols]), label = test_set$target)
  
  # Train Model
  model <- xgboost(data = dtrain, nrounds = 100, objective = "reg:squarederror", 
                   eta = 0.05, max_depth = 5, verbose = 0)
  
  # Predict on test set
  preds <- predict(model, dtest)
  rmse_val <- sqrt(mean((test_set$target - preds)^2))
  
  # Predict Next Day
  latest_data <- tail(ticker_data, 1)
  next_pred <- predict(model, as.matrix(latest_data[, feature_cols]))
  
  ml_results[[t]] <- tibble(
    Ticker = t,
    Current_Price = latest_data$close,
    Predicted_Price = next_pred,
    Change_Pct = ((next_pred - latest_data$close) / latest_data$close) * 100,
    RMSE = rmse_val
  )
}

final_ml_results <- bind_rows(ml_results)

# Display ML Table
final_ml_results %>%
  gt() %>%
  fmt_percent(columns = Change_Pct, scale_values = FALSE) %>%
  fmt_number(columns = c(Current_Price, Predicted_Price, RMSE), decimals = 2)
Ticker Current_Price Predicted_Price Change_Pct RMSE
AAPL 248.68 236.12 −5.05% 18.80
ADBE 302.73 306.16 1.13% 8.27
AMZN 239.68 228.56 −4.64% 6.58
BAC 51.58 43.85 −14.99% 6.28
DIS 111.56 110.37 −1.07% 2.68
GOOGL 328.39 188.99 −42.45% 68.56
HD 383.07 379.56 −0.92% 10.66
JNJ 218.29 157.80 −27.71% 27.65
JPM 297.76 259.16 −12.96% 37.55
KO 72.26 67.56 −6.51% 2.29
MA 525.59 524.06 −0.29% 21.06
META 661.78 656.55 −0.79% 39.42
MSFT 469.30 424.53 −9.54% 64.67
NFLX 86.00 86.38 0.44% 19.34
NVDA 187.94 127.50 −32.16% 43.80
PG 150.58 151.22 0.42% 2.29
PYPL 56.91 56.10 −1.43% 1.57
TSLA 448.26 415.91 −7.22% 31.28
UNH 355.18 417.20 17.46% 96.35
V 327.00 328.23 0.38% 9.99
WMT 118.40 97.62 −17.55% 9.10
Code
# Interactive Plotly Chart for the first ticker as an example
plot_ly(data = df_ta %>% filter(ticker == "AAPL")) %>%
  add_lines(x = ~date, y = ~close, name = "Actual Price") %>%
  layout(title = "Stock Price Overview", xaxis = list(title = "Date"), yaxis = list(title = "Price"))